{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使用信息熵寻找最优划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "\n",
    "iris = datasets.load_iris()\n",
    "X = iris.data[:,2:]\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=2,\n",
       "            max_features=None, max_leaf_nodes=None,\n",
       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "            min_samples_leaf=1, min_samples_split=2,\n",
       "            min_weight_fraction_leaf=0.0, presort=False, random_state=42,\n",
       "            splitter='best')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
    "dt_clf = DecisionTreeClassifier(max_depth=2, criterion=\"entropy\", random_state=42)\n",
    "dt_clf.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_decision_boundary(model, axis):\n",
    "    \n",
    "    x0, x1 = np.meshgrid(\n",
    "        np.linspace(axis[0], axis[1], int((axis[1]-axis[0])*100)).reshape(-1, 1),\n",
    "        np.linspace(axis[2], axis[3], int((axis[3]-axis[2])*100)).reshape(-1, 1),\n",
    "    )\n",
    "    X_new = np.c_[x0.ravel(), x1.ravel()]\n",
    "\n",
    "    y_predict = model.predict(X_new)\n",
    "    zz = y_predict.reshape(x0.shape)\n",
    "\n",
    "    from matplotlib.colors import ListedColormap\n",
    "    custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])\n",
    "    \n",
    "    plt.contourf(x0, x1, zz, cmap=custom_cmap)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHDpJREFUeJzt3X2QHPV95/H3dx+Q0LJiwRJIIJk1BYmNSZDCnoytKxcVOxwgyriIuZPP59guKkolxgd3qbo60BVPZ4ekrsqOyvjsUoALJD6IebCjBBFjl0l4cPGwkkUMiCvrsDkpElpAT6sn0O5+74/p3Z3t7dnp6emZfpjPq2qL6d/8uucrUfqq1f2ZX5u7IyIi5dKVdQEiIpI+NXcRkRJScxcRKSE1dxGRElJzFxEpITV3EZESqtvczWy+mb1gZi+Z2StmdnvEnHlm9jdmtsPMnjezwVYUKyIi8cQ5c38X+G13vwhYAVxuZpeE5lwH7Hf384BvAH+WbpkiItKIus3dKw4Hm73BT/ibT1cD9wWvHwY+YWaWWpUiItKQnjiTzKwb2AKcB3zL3Z8PTTkb2Ang7mNmdhB4H/B26DjrgHUAffPmXXz+kiXNVS8Neefk92Vdgog0aedrW99298X15sVq7u4+DqwwswHg+2Z2obu/XDUl6ix91roG7r4R2AiwcnDQn1y/Ps7HS0ruW/GFrEsQkSbduGreG3HmNZSWcfcDwD8Cl4fe2gUsBzCzHuBUYF8jxxYRkfTEScssDs7YMbOTgU8Cr4WmbQImTws/A/zEtSKZiEhm4lyWWQrcF1x37wK+5+5/b2Z3AMPuvgm4B/grM9tB5Yx9bcsqFhGRuuo2d3f/Z2BlxPgtVa+PA9emW5qIiCSlb6iKiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlFDd5m5my83sSTPbbmavmNkNEXMuNbODZrYt+LmlNeWKiEgcPTHmjAF/7O5bzawf2GJmP3L3V0Pznnb3q9IvUUSKZMeBxxkeuYvDY3s5pedMhs64nvMGrki0H5DoWBKjubv7HmBP8HrUzLYDZwPh5i4iHW7Hgcd5es9XGffjABwee5On93wVYM6mHLXfP+2+DTNjwk80dCypaOiau5kNAiuB5yPe/qiZvWRmj5vZh1OoTUQKZnjkrqkGPWncjzM8clfD+zljU429kWNJRZzLMgCY2SnAI8CN7n4o9PZW4Bx3P2xmVwI/AM6POMY6YB3AstNPT1y0iOTT4bG9DY3HfT/p3E4W68zdzHqpNPbvuvuj4ffd/ZC7Hw5ebwZ6zWxRxLyN7j7k7kOL+vubLF1E8uaUnjMbGo/7ftK5nSxOWsaAe4Dt7v71GnOWBPMws1XBcd9Js1ARyb+hM66n2+bPGOu2+VM3RxvZz+ihy3obPpZUxLkssxr4PPBzM9sWjN0MvB/A3b8DfAb4QzMbA44Ba93dW1CviOTY5I3ORhMutfZLciypiJOWeQawOnPuAnSXQyRnksYS43p29528duBRnAmMLj44cA2rz7op0WecN3BF5H5q5snEvqEqIsWSNJYY17O772T7gYentp2Jqe3VZ93U9PGlOVp+QKSkksYS43rtwKxsxZzj0l5q7iIllTSWGJcz0dC4tJeau0hJJY0lxmU12ketcWkv/V8QKamkscS4PjhwTUPj0l66oSpSUkljiXFN3jSNSstI9tTcRTpM3NUXo8bCfzGsPuumWc08TvwybkSz1VHOpPJaVzU1d5GSirvS4lN7bsfdccbmnJd0dcfwfnEjmq2OciaV17rCdM1dpKTirrQ44SemGvtc85Ku7hjeL25Es9VRzqTyWleYmrtISbVi9cSkqztWj8eNaLY6yplUXusKU3MXKalWrJ6YdHXH6vG4Ec1WRzmTymtdYWruIiUVd6XFLuvFQrffkq7IGCd+GTei2eooZ1J5rStMN1RFSqqRlRbjjiVd3bF6v7gRzVZHOZPKa11hltXKvCsHB/3J9esz+exOdd+KL2RdgpRM0SONUfJe642r5m1x96F683TmLiKJFD3SGKVItdaja+4ikkjRI41RilRrPWruIpJI0SONUYpUaz1q7iKSSNEjjVGKVGs9au4ikkjRI41RilRrPbqhKiKJFD3SGKVItdaj5i4iidV6qHXSeXlQpFrnouYuIpHCee/lp/xrdh5+JpUvP+UlS56XOlpBzV1EZonKe28/8PDU+40sH5x0yd9Wy0sdraIbqiIyS1TeOyzu8sFJl/xttbzU0Spq7iIyS9q57iRL/rZaXupoFTV3EZkl7Vx3kiV/Wy0vdbSKmruIzBKV9w6Lu3xw0iV/Wy0vdbSKbqiKyCxRee+00jJ5yZLnpY5Wqbvkr5ktB+4HlgATwEZ33xCaY8AG4ErgKPBFd98613G15G/7acnfcmllVLHomok45j0emeaSv2PAH7v7VjPrB7aY2Y/c/dWqOVcA5wc/HwG+HfxXRFogTlQxKpYYFV8sU/wPmos4likeWfeau7vvmTwLd/dRYDtwdmja1cD9XvEcMGBmS1OvVkSAeFHFqFhiVHyxTPE/aC7iWKZ4ZEM3VM1sEFgJPB9662xgZ9X2Lmb/BYCZrTOzYTMbfnt0tLFKRWRKK6OKRddMxLFM8cjYzd3MTgEeAW5090PhtyN2mXUx3903uvuQuw8t6u9vrFIRmdLKqGLRNRNxLFM8MlZzN7NeKo39u+7+aMSUXcDyqu1lwO7myxORKHGiilGxxKj4Ypnif9BcxLFM8ci6N1SDJMw9wHZ3/3qNaZuA683sQSo3Ug+6+570yhSRaq2MKhZdMxHHMsUj46RlVgOfB35uZtuCsZuB9wO4+3eAzVRikDuoRCG/lH6pIlItvDTtjgOPs/PwMzPm7D26jSNjI4BzZGyEvUe3sfqsm3K7SmNY0rqaWba3Y5b8dfdniL6mXj3HgS+nVZSINCYqwvdPu2/FGZ+a40xMxSVXn3XTnPvmIf6X17qKQssPiJRAVISvurFXe+3AzNtmeY3/5bWuolBzFymBRqJ6zkSsfbOO/+W1rqJQcxcpgUaiehb6Y5/X+F9e6yoKNXeREoiK8BndkXM/OHBN3X3zEP/La11FoVUhRUqgVoRv79FtvHbgUZwJjC4+OHDNjJupc+2b9U3LvNZVFGruIgXw8i9+nxfe28IElX9urzrpYg72nTurca/9tcdm7TuZfe/rOYMzF6yIPH6c+N+zu++c9XlnLlgRq/nGiTTWmhOnmec1ypklNXeRnHv5F7/Pc+9tAaskkiegsn1ielXtqJhjmlHCZ3ffOWPVycnP237gESZXGql1/Dh1aCXH9Omau0jOvVDV2KeEtwPVMcc0o4Th+OS0mUtIRR0/Th1ayTF9au4iOTdRf8qU6phjmlHCcHxyLuHjx6lDKzmmT81dJOca+UNaHXNMM0oYjk/OJXz8OHVoJcf0qbmL5Nyqky6G8OMwazweszrmmGaUMByfnDbz8lDU8ePUoZUc06cbqiI5d+H5fwEx0zLVMcc0o4STx02SlolTh1ZyTF/dB2S3ih6Q3X56QLZI8aX5gGwpiRs+/NdZl9DxHntjOxtefpY3j46yZEE/N1y4mjXnfCidg786DE9thkP7YeFp8PEr4YK6PUAK5saY89TcRdrksTe2c9uWH3N8vPLQ6j1HR7lty48Bmm/wrw7DP3wPxoKHXx/aX9kGNfgOpRuqIm2y4eVnpxr7pOPjY2x4+dnmD/7U5unGPmnsRGVcOpKau0ibvHl0tKHxhhza39i4lJ6au0ibLFnQ39B4Qxae1ti4lJ6au0ib3HDhauZ3z7zNNb+7hxsuXN38wT9+JfT0zhzr6a2MS0fSDVWRNpm8adqStMzkTVOlZSSg5i7SRmuOHGHNzt3TDfgDR2ZPShppvGBo9rycxCNbGgGVSGruIu0SJ66YZqQxJ/HIlkZApSZdcxdplzhxxTQjjTmJR7Y0Aio1qbmLtEucuGKakcacxCNbGgGVmtTcRdolTlwxzUhjTuKRLY2ASk1q7iLtEieumGakMSfxyJZGQKUm3VAVaZc4ccU0I405iUe2NAIqNdVt7mZ2L3AVMOLuF0a8fynwt8Avg6FH3f2ONIsUyZVm4oVP/h0cOVR5fWg/PPHI7GPF9cRD8NJz4BNgXTz2od9gg43NaKD09bFh+Vm8ebS/MtbXx5rGf8VNW3POh9TM2yzOmftfAncB988x52l3vyqVikTyrJl44bdunW7sk947XvmZPNZjD0CXwfj43Md/4iHY9tOpzccWzOe2I+9wvKtypXXP0VH+24s/xMw4MTExNaYIYueoe83d3Z8C9rWhFpH8ayZeGG7sUXxiurHPdfyXnpuxueG0ganGPrWb+1Rjn6QIYudI64bqR83sJTN73Mw+XGuSma0zs2EzG357VDEoKaCs4oXh4/vMpv1mT3fsQymC2BnSaO5bgXPc/SLgm8APak10943uPuTuQ4v6FYOSAsoqXhg+vs38o7tkLHS2PwdFEDtD083d3Q+5++Hg9Wag18wWNV2ZSB41Ey/sW1h/jnVBd+gsPOr4F10yY/OG/QeYH7oE02NGb+hSjSKInaPp5m5mS8zMgtergmO+0+xxRXLpgiG4/N9On0kvPK2yHSct8+XbZzf4k+bPPNaaz8IVa+sf/7JrYcXHps7g1xw9zm1972Ppgn4MWLqgn6/+q3/Dfx+6bMbYbRd/UjdTO0ScKOQDwKXAIjPbBdwK9AK4+3eAzwB/aGZjwDFgrbt7yyoWSUuaqy9GCUUVueiSSoOPqqHarl/C6MHK69GDlW2YXetl11Z+AmuCn7BWNnOt9phfllUfXjk46E+uX5/JZ3eqgc/Fv+lWeuFII1Quf8Q9C68nFFWcsuJj0w05qgbrmnWzNHI8zVoTCq/2CJXLPvrXQWtZ33Vb3L3u/3gtPyCdqdUrJoaiipHjUTVENfao8Rw8/FqrPeabmrt0plZHGuM06WY/K+OHX2u1x3xTc5fO1OpIo9X4o1U93uxnZfzwa632mG9q7tKZWr1iYiiqGDkeVUOcvxQgFw+/1mqP+aZVIaUztXrFxMmbpuG0TFW6pWYNu345e79lH8h8dccwrfaYb0rLdBClZUSKL25aRmfuUmzNLL8b5cH/Cf/vF9PbfQvh6OGZZ9Ew+8w6aizqbBtmjp37IXh9e67OyKUcdObeQUp35p52Vj3c2JtlBtV/vrq7YcJrJ2kgF/l1yTfl3KX80s6qp9nYYWZjh8pSvnM1dshFfl3KQc1diiur5Xdbrej1Sy6ouUtxZbX8bqsVvX7JBTV3Ka60s+rvP7/5mqpVFkud1t1dO8c+KQf5dSkHNXcprmaW342y9o9mN/i+hdMN2boqC39VLbU759iafz+ztivWVpb0rR5b8bH06hepoiikFFvc5XejRMUof3MVHHh77mjiq8PT8cX+UyuRxwuGZn5BaXJeknrTjndKR1Jzl84UjlEe2g+PPQBdNv2A6kP7K3NgurlG7Ree08i8OHXF2U8kRJdlpDPVWm53PPQs0nA0MW78MmlMs9VLEUvHUHOXztRI3LB6btz4ZdKYZlnjndJ2au7SmRqJG1bPjRu/TBrTLGu8U9pOzV06U63ldrtDSzSEo4lx45dJY5qtXopYOoZuqEpnqrXcbtRY9Y3MuEsFJ11SuNVLEUvHUHOXdOU1xhdVV5Q4UcW48cukMc1m4p0iATV3SU9eY3xRdT3+4MwVGvNSq0hKdM1d0pPXGF9UXVErNOahVpGUqLlLevIa40saexQpMDV3SU9eY3xJY48iBabmLunJa4wvqq6oFRrzUKtISnRDVdKT1xhf0tijSIHVbe5mdi9wFTDi7hdGvG/ABuBK4CjwRXffmnahUhBJY3xPPDT7AdPhVRbjzqsVx4yqK2rFRz3AWkogzmWZvwQun+P9K4Dzg591wLebL0s6yhMPwbafTqdXfKKy/cRDjc+bjD1O3hidjDhGLb8bFrXvtp8mO5ZIxuo2d3d/Ctg3x5Srgfu94jlgwMyWplWgdICXnos3HmdeM3HMqH3DFJeUgkjjhurZwM6q7V3B2Cxmts7Mhs1s+O3R0RQ+WkohnDevNR5nXjNxzLgxSMUlpQDSaO4WMeZRE919o7sPufvQov7+FD5aSqHWc0XD43HmNRPHjBuDVFxSCiCN5r4LWF61vQzYncJxpVNcdEm88TjzmoljRu0bprikFEQazX0T8HtWcQlw0N33pHBc6RSXXRv9gOlwCibOvGYemh21rx5gLQVl7pFXUKYnmD0AXAosAvYCtwK9AO7+nSAKeReVRM1R4EvuXjdOsHJw0J9cv76p4qUxA5/rrj9JRHLN+q7b4u51zzDq5tzd/bN13nfgyw3UJiIiLablB0RESkjNXUSkhNTcRURKSM1dRKSE1NxFREpIzV1EpITU3EVESkjNXUSkhNTcRURKSM1dRKSE1NxFREpIzV1EpITU3EVESkjNXUSkhNTcRURKSM1dRKSE1NxFREpIzV1EpITU3EVESkjNXUSkhOo+IFvq2zx6Mt/c18+bY90s6RnnK6ePcmX/sazLEpEOpubepM2jJ3PHW6dy3Cv/CNoz1sMdb50KoAYvIpnRZZkmfXNf/1Rjn3Tcu/jmvv6MKhIRUXNv2ptj3Q2Ni4i0g5p7k5b0jDc0LiLSDmruTfrK6aPMt4kZY/Ntgq+cPppRRSIiuqHatMmbpkrLiEiexGruZnY5sAHoBu529z8Nvf9F4H8A/xIM3eXud6dYZ65d2X9MzVxEcqVuczezbuBbwO8Au4AXzWyTu78amvo37n59C2osJGXfRSRLca65rwJ2uPvr7v4e8CBwdWvLKrbJ7PuesR4cm8q+bx49OevSRKRDxGnuZwM7q7Z3BWNhv2tm/2xmD5vZ8lSqKyhl30Uka3Gau0WMeWj774BBd/9N4MfAfZEHMltnZsNmNvz2aHnTJMq+i0jW4jT3XUD1mfgyYHf1BHd/x93fDTb/Arg46kDuvtHdh9x9aFF/ec9ilX0XkazFae4vAueb2QfM7CRgLbCpeoKZLa3a/BSwPb0Si0fZdxHJWt20jLuPmdn1wA+pRCHvdfdXzOwOYNjdNwH/0cw+BYwB+4AvtrDm3FP2XUSyFivn7u6bgc2hsVuqXt8E3JRuae0VN7q4btfpvPDuvKntVfPe5dOnHpu1b5qfKSLSKH1DlfjL9k439ul7zC+8O48XR+bhwdiesR5uHRnAccaofTwtFSwiraS1ZYgfXQw39gqbauyTTmBTjb3W8RSXFJFWUnOnfdHF6uMpLikiraTmTvuii9XHU1xSRFpJzZ340cVV895l9ve3HAuN9eL0MPfxFJcUkVZSc6dyA/OWxQdZ2jOG4SztGeOWxQdn3djcuGxfVYOv/Kya9y5fO+PAjH1vP+MAd5wx9/HifqaISBJKywR+dqyXvWPdOLB3rJufHevlBwdPjow97tzXMxVf/PSpxyKX/I2zSJiWChaRVlFzB742spCHRvuYTMJMQLAN9WKPUfFFxRxFJGu6LAM8UtXYp1nkWDj2GBVfVMxRRLKm5g6hW5+NC8cXFXMUkaypudP8b0I4vqiYo4hkTc0d+N3+I0RFHOPEHqPii4o5ikjW1NyB9Wcc4tr+I3QFDb0L59r+I7Fij1HxRcUcRSRrpUvLxF1p8WsjC3lktI8JKn/D9TExde19Athy9CTeGJ/527Pl3ZPYPtLL6NSCYN3cObKQb7zVz1s+fT19sY3znxbXP0vXqpAi0iqlau5xI4hR0cdKw55Owrw+3hu8mh4bB0ZDKZpRuhj1mfPe8m5uHhmYGtOqkCLSbqW6LBM3glg7+hjeTm9Mq0KKSDuVqrnHjSA2G31MSqtCiki7lKq5x40gZvWL1qqQItIupWrucSOItaOP4e30xrQqpIi0U6mae9wIYlT0sZ8JqmOP53afoLtqG5zuiHn9TLDYxmeMLbZx/qROZFJxSRFpJXMPn3W2x8rBQX9y/fpMPrtTDXxO1/NFis76rtvi7kP15pUqCllL0jx51H53v9NXFZOEc7tP8Ojg260sX0SkYaW6LBNlMk++Z6wHx6by5PXWW4/a7+aRgaCx29TP6+O9XPOrRe34pYiIxFb65p40Tx61X61Me/WZvIhIHpS+uSfNkytvLiJFVvrmnjRPrry5iBRZ6Zt70jx51H61Mu3ndp9ouk4RkTSVvrknzZNH7fcnZxwIGvnMPLzSMiKSN7GikGZ2ObAB6Abudvc/Db0/D7gfuBh4B/h37v6rdEtN7sr+Y4m+HBS1n75kJCJFUPfM3cy6gW8BVwAXAJ81swtC064D9rv7ecA3gD9Lu1AREYkvzmWZVcAOd3/d3d8DHgSuDs25GrgveP0w8AkzC2cGRUSkTeJcljkb2Fm1vQv4SK057j5mZgeB9wEzLkab2TpgXbB5+LR16/5PkqJjWhT+/IJJv/519aekSL//2Sly7aD66zknzqQ4zT3qDDwcGYkzB3ffCGyM8ZlNM7PhOOsv5JXqz1aR6y9y7aD60xLnsswuYHnV9jJgd605ZtYDnArsS6NAERFpXJzm/iJwvpl9wMxOAtYCm0JzNgFfCF5/BviJZ7XcpIiI1L8sE1xDvx74IZUo5L3u/oqZ3QEMu/sm4B7gr8xsB5Uz9rWtLDqmtlz+aSHVn60i11/k2kH1pyKz9dxFRKR1Sv8NVRGRTqTmLiJSQqVr7mZ2r5mNmNnLWdeShJktN7MnzWy7mb1iZjdkXVNcZjbfzF4ws5eC2m/PuqYkzKzbzH5mZn+fdS2NMrNfmdnPzWybmQ1nXU+jzGzAzB42s9eCPwMfzbqmuMzs14Pf98mfQ2Z2Y2b1lO2au5l9HDgM3O/uF2ZdT6PMbCmw1N23mlk/sAX4tLu/mnFpdQXfSu5z98Nm1gs8A9zg7s9lXFpDzOw/A0PAQne/Kut6GmFmvwKG3L2QXwIys/uAp9397iCdt8DdD2RdV6OCZVv+BfiIu7+RRQ2lO3N396cocMbe3fe4+9bg9Siwnco3gHPPKw4Hm73BT6HOHsxsGbAGuDvrWjqNmS0EPk4lfYe7v1fExh74BPB/s2rsUMLmXiZmNgisBJ7PtpL4gksa24AR4EfuXpjaA38O/BcgvJh/UTjwhJltCZb7KJJzgbeA/xVcFrvbzPqyLiqhtcADWRag5p5TZnYK8Ahwo7sfyrqeuNx93N1XUPkm8yozK8ylMTO7Chhx9y1Z19KE1e7+W1RWcf1ycJmyKHqA3wK+7e4rgSPAf822pMYFl5M+BTyUZR1q7jkUXK9+BPiuuz+adT1JBP+c/kfg8oxLacRq4FPBdesHgd82s7/OtqTGuPvu4L8jwPeprOpaFLuAXVX/2nuYSrMvmiuAre6+N8si1NxzJrgpeQ+w3d2/nnU9jTCzxWY2ELw+Gfgk8Fq2VcXn7je5+zJ3H6Tyz+qfuPt/yLis2MysL7gJT3A54zKgMKkxd38T2Glmvx4MfQLIfZAgwmfJ+JIMxHwSU5GY2QPApcAiM9sF3Oru92RbVUNWA58Hfh5cuwa42d03Z1hTXEuB+4KkQBfwPXcvXJywwM4Evh88SqEH+N/u/g/ZltSwrwDfDS5tvA58KeN6GmJmC4DfAf4g81rKFoUUERFdlhERKSU1dxGRElJzFxEpITV3EZESUnMXESkhNXcRkRJScxcRKaH/D6v9Z7UVcMyCAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_decision_boundary(dt_clf, axis=[0.5, 7.5, 0, 3])\n",
    "plt.scatter(X[y==0,0], X[y==0,1])\n",
    "plt.scatter(X[y==1,0], X[y==1,1])\n",
    "plt.scatter(X[y==2,0], X[y==2,1])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 模拟使用信息熵进行划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split(X, y, d, value):\n",
    "    index_a = (X[:,d] <= value)\n",
    "    index_b = (X[:,d] > value)\n",
    "    return X[index_a], X[index_b], y[index_a], y[index_b]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "from math import log\n",
    "\n",
    "def entropy(y):\n",
    "    counter = Counter(y)\n",
    "    res = 0.0\n",
    "    for num in counter.values():\n",
    "        p = num / len(y)\n",
    "        res += -p * log(p)\n",
    "    return res\n",
    "\n",
    "def try_split(X, y):\n",
    "    \n",
    "    best_entropy = float('inf')\n",
    "    best_d, best_v = -1, -1\n",
    "    for d in range(X.shape[1]):\n",
    "        sorted_index = np.argsort(X[:,d])\n",
    "        for i in range(1, len(X)):\n",
    "            if X[sorted_index[i], d] != X[sorted_index[i-1], d]:\n",
    "                v = (X[sorted_index[i], d] + X[sorted_index[i-1], d])/2\n",
    "                X_l, X_r, y_l, y_r = split(X, y, d, v)\n",
    "                p_l, p_r = len(X_l) / len(X), len(X_r) / len(X)\n",
    "                e = p_l * entropy(y_l) + p_r * entropy(y_r)\n",
    "                if e < best_entropy:\n",
    "                    best_entropy, best_d, best_v = e, d, v\n",
    "                \n",
    "    return best_entropy, best_d, best_v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_entropy = 0.46209812037329684\n",
      "best_d = 0\n",
      "best_v = 2.45\n"
     ]
    }
   ],
   "source": [
    "best_entropy, best_d, best_v = try_split(X, y)\n",
    "print(\"best_entropy =\", best_entropy)\n",
    "print(\"best_d =\", best_d)\n",
    "print(\"best_v =\", best_v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "X1_l, X1_r, y1_l, y1_r = split(X, y, best_d, best_v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entropy(y1_l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6931471805599453"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entropy(y1_r)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_entropy = 0.2147644654371359\n",
      "best_d = 1\n",
      "best_v = 1.75\n"
     ]
    }
   ],
   "source": [
    "best_entropy2, best_d2, best_v2 = try_split(X1_r, y1_r)\n",
    "print(\"best_entropy =\", best_entropy2)\n",
    "print(\"best_d =\", best_d2)\n",
    "print(\"best_v =\", best_v2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "X2_l, X2_r, y2_l, y2_r = split(X1_r, y1_r, best_d2, best_v2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.30849545083110386"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entropy(y2_l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10473243910508653"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entropy(y2_r)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
